package com.flejay.dreamrecruiter.tools.converter;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;

import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;

import com.flejay.dreamrecruiter.model.CurriculumVitae;

/**
 * Tool qui converti un fichier PDF en CurriculumVitae
 * 
 * @author Victor Fleurant
 *
 */
public final class PdfFileConverterTool{

	private PdfFileConverterTool(){
		super();
	}
	
	/**
	 * Converti le fichier en CV
	 * 
	 * @param file
	 * @return
	 * @throws FileNotFoundException
	 * @throws IOException
	 */
	public static CurriculumVitae convert(final File file) throws FileNotFoundException, IOException {
		assert file == null : "file could not be null";
		assert !file.isFile() : "file is not a file";
		String cvContent = extractTextFromPdf(file);
		CurriculumVitae cv = CurriculumVitae.getNewInstance();
		cv.setContent(cvContent);
		cv.setName(file.getName());
		return cv;
	}
	
	/**
	 * extrait le texte du PDF
	 * 
	 * @param file
	 * @return
	 * @throws FileNotFoundException
	 * @throws IOException
	 */
	private static String extractTextFromPdf(final File file) throws FileNotFoundException, IOException {
		PDFParser parser = new PDFParser(new FileInputStream(file));
		parser.parse();
		PDFTextStripper pdfStripper = new PDFTextStripper();
		PDDocument pdDoc = new PDDocument( parser.getDocument() );
		String res = pdfStripper.getText(pdDoc);
		// releasing resources
		pdDoc.close();
		parser.clearResources();
		return res;
	}

}
